library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(RSQLite)
library(devtools)
## Loading required package: usethis
library(ggpubr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.5     ✓ purrr   0.3.4
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(knitr)

Summary of how fantasy football works People draft teams from a pool of real NFL players Points are gained on a weekly basis depending on how their players perform Point System:

Passing:

#Data is through November 29, 2021
df <- read.csv("~/Downloads/nfl_pass_rush_receive_raw_data (1).csv")
#This data frame contains data from the start of 2019 to the current day in the NFL
team = c("ARI", "ATL", "BAL", "BUF", "CAR", "CHI", "CIN", "CLE", "DAL", "DEN", "DET", "GNB", "HOU", "IND", "JAX", "KAN", "LVR", "LAC", "LAR", "MIA", "MIN", "NWE", "NOR", "NYG", "NYJ", "PHI", "PIT", "SFO", "SEA", "TAM", "TEN", "WAS")
wins = c(9,5,8,7,5,4,7,6,7,6,0,9,2,6,2,7,6,6,7,5,5,8,5,4,3,5,5,6,3,8,8,5)
losses = c(2,6,3,4,7,7,4,6,4,5,10,3,9,6,9,4,5,5,4,7,6,4,6,7,8,7,5,5,8,3,4,6)
ties = c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0)
points_for = c(310,199,263,326,236,179,309,254,326,228,174,283,164,340,173,281,259,273,299,234,281,336,257,202,199,304,224,280,209,347,304,229)
points_against = c(202,302,240,182,253,254,226,267,250,196,289,242,292,283,283,250,295,293,263,279,276,190,249,253,334,273,267,248,226,253,290,282)
team_records = data.frame(team, wins, losses, ties, points_for, points_against)
Fantasy_PTS_By_Team = sqldf("select team, sum(Total_DKP)
                            from df 
                            group by team")
Fantasy_PTS_By_Team = rename(Fantasy_PTS_By_Team, 'Total_DKP' = 'sum(Total_DKP)')
Combined_Team_Stats = sqldf("select team_records.team, team_records.wins, team_records.losses, team_records.ties, team_records.points_for, team_records.points_against, Fantasy_PTS_By_Team.Total_DKP
                           from team_records
                           left join Fantasy_PTS_By_Team on team_records.team = Fantasy_PTS_By_Team.team")
Fantasy_PTS_Against_Team = sqldf("select Opponent_abbrev, sum(Total_DKP)
                                 from df 
                                 group by Opponent_abbrev")
Fantasy_PTS_Against_Team = rename(Fantasy_PTS_Against_Team, 'Total_DKP' = 'sum(Total_DKP)')
Fantasy_PTS_Against_Team = rename(Fantasy_PTS_Against_Team, 'team' = 'Opponent_abbrev')
Combined_Points_Against = sqldf("select team_records.team, team_records.wins, team_records.losses, team_records.ties, team_records.points_for, team_records.points_against, Fantasy_PTS_Against_Team.Total_DKP
                                from team_records
                                left join Fantasy_PTS_Against_Team on team_records.team = Fantasy_PTS_Against_Team.team")
Points_Per_Win <- ggplot(Combined_Team_Stats, aes(x = Total_DKP, y = wins))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Fantasy Points and Wins", x = "Fantasy Points", y = "Wins")+
  geom_point()
Points_Per_Win
## `geom_smooth()` using formula 'y ~ x'

Fantasy_PTS_Per_Actual_Point <- ggplot(Combined_Team_Stats, aes(x = Total_DKP, y = points_for))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Fantasy Points and Points For", x = "Fantasy Points", y = "Points For")+
  geom_point()
Fantasy_PTS_Per_Actual_Point
## `geom_smooth()` using formula 'y ~ x'

Fantasy_PTS_Per_Point_Against <- ggplot(Combined_Points_Against, aes(x = Total_DKP, y = points_against))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Fantasy Points Against and Points For", x = "Fantasy Points Against", y = "Points For")+
  geom_point()
Fantasy_PTS_Per_Point_Against
## `geom_smooth()` using formula 'y ~ x'

cor_wins = cor.test(Combined_Team_Stats$Total_DKP, Combined_Team_Stats$wins, method = "pearson")
cor_wins
## 
##  Pearson's product-moment correlation
## 
## data:  Combined_Team_Stats$Total_DKP and Combined_Team_Stats$wins
## t = 6.6849, df = 30, p-value = 2.092e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5817269 0.8838251
## sample estimates:
##       cor 
## 0.7735171
model_wins = lm(points_for~Total_DKP, Combined_Team_Stats)
summary(model_wins)
## 
## Call:
## lm(formula = points_for ~ Total_DKP, data = Combined_Team_Stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.366 -17.102  -5.118  16.970  60.128 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -99.20128   35.03917  -2.831   0.0082 ** 
## Total_DKP     0.34691    0.03367  10.304 2.27e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.63 on 30 degrees of freedom
## Multiple R-squared:  0.7797, Adjusted R-squared:  0.7723 
## F-statistic: 106.2 on 1 and 30 DF,  p-value: 2.267e-11
# y = 0.0118x - 6.6243 R-Squared = 0.5849 (Good)
cor_points_for = cor.test(Combined_Team_Stats$Total_DKP, Combined_Team_Stats$points_for, method = "pearson")
cor_points_for
## 
##  Pearson's product-moment correlation
## 
## data:  Combined_Team_Stats$Total_DKP and Combined_Team_Stats$points_for
## t = 10.304, df = 30, p-value = 2.267e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7720058 0.9417371
## sample estimates:
##       cor 
## 0.8830013
model_points_for = lm(wins~Total_DKP, Combined_Team_Stats)
summary(model_points_for)
## 
## Call:
## lm(formula = wins ~ Total_DKP, data = Combined_Team_Stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5947 -0.9079  0.0239  0.7526  2.2182 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.624356   1.843185  -3.594  0.00115 ** 
## Total_DKP    0.011839   0.001771   6.685 2.09e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.348 on 30 degrees of freedom
## Multiple R-squared:  0.5983, Adjusted R-squared:  0.5849 
## F-statistic: 44.69 on 1 and 30 DF,  p-value: 2.092e-07
# y = 0.3469x - 99.2013 R-Squared = 0.7723

cor_points_against = cor.test(Combined_Points_Against$Total_DKP, Combined_Points_Against$points_against, method = "pearson")
cor_points_against
## 
##  Pearson's product-moment correlation
## 
## data:  Combined_Points_Against$Total_DKP and Combined_Points_Against$points_against
## t = 6.3442, df = 30, p-value = 5.349e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5546285 0.8747487
## sample estimates:
##       cor 
## 0.7569346
model_points_against = lm(points_against~Total_DKP, Combined_Points_Against)
summary(model_points_against)
## 
## Call:
## lm(formula = points_against ~ Total_DKP, data = Combined_Points_Against)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -55.896 -14.128   1.545  14.001  47.603 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -35.32768   46.53972  -0.759    0.454    
## Total_DKP     0.28502    0.04493   6.344 5.35e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.9 on 30 degrees of freedom
## Multiple R-squared:  0.573,  Adjusted R-squared:  0.5587 
## F-statistic: 40.25 on 1 and 30 DF,  p-value: 5.349e-07
# y = 0.2850x - 35.3277 R-Squared = 0.5587

team_table = c(cor_wins$estimate, cor_points_for$estimate, cor_points_against$estimate)
row_names3 = c("Wins", "Points For", "Points Against")
team_table = as.data.frame(team_table, row_names3)
team_table
##                team_table
## Wins            0.7735171
## Points For      0.8830013
## Points Against  0.7569346
worst_teams = team_records[order(points_against),]
bottom_ten = tail(worst_teams, 5)
bottom_ten_teams = bottom_ten$team
bottom_ten_teams_df = as.data.frame(bottom_ten_teams)
Vs_bottom_ten = sqldf("select df.pos, df.pass_att, df.rush_att, df.rec, df.team, df.Opponent_abbrev, df.Total_DKP, bottom_ten_teams_df.bottom_ten_teams
                         from df 
                         left join bottom_ten_teams_df on df.Opponent_abbrev = bottom_ten_teams_df.bottom_ten_teams")
Vs_bottom_ten = Vs_bottom_ten[complete.cases(Vs_bottom_ten),]

QB_vs_bottom_ten = sqldf("select pos, avg(Total_DKP) 
                         from Vs_bottom_ten
                         where pass_att >= 10 and pos = 'QB'")
RB_vs_bottom_ten = sqldf("select pos, avg(Total_DKP) 
                         from Vs_bottom_ten
                         where rush_att >= 5 and pos = 'RB'")
WR_TE_vs_bottom_ten = sqldf("select pos, avg(Total_DKP) 
                         from Vs_bottom_ten
                         where rec >= 3 and pos ='WR' or pos ='TE'")

top_ten = head(worst_teams, 5)
top_ten_teams = top_ten$team
top_ten_teams_df = as.data.frame(top_ten_teams)
Vs_top_ten = sqldf("select df.pos, df.team, df.Opponent_abbrev, df.pass_att, df.rush_att, df.rec, df.Total_DKP, top_ten_teams_df.top_ten_teams
                         from df 
                         left join top_ten_teams_df on df.Opponent_abbrev = top_ten_teams_df.top_ten_teams")
Vs_top_ten = Vs_top_ten[complete.cases(Vs_top_ten),]
QB_vs_top_ten = sqldf("select pos, avg(Total_DKP) 
                         from Vs_top_ten
                         where pass_att >= 10 and pos = 'QB'")
RB_vs_top_ten = sqldf("select pos, avg(Total_DKP) 
                         from Vs_top_ten
                         where rush_att >= 5 and pos = 'RB'")
WR_TE_vs_top_ten = sqldf("select pos, avg(Total_DKP) 
                         from Vs_top_ten
                         where rec >= 3 and pos ='WR' or pos ='TE'")

vs_top_teams = c(QB_vs_top_ten$`avg(Total_DKP)`, RB_vs_top_ten$`avg(Total_DKP)`, WR_TE_vs_top_ten$`avg(Total_DKP)`)
vs_worst_teams = c(QB_vs_bottom_ten$`avg(Total_DKP)`, RB_vs_bottom_ten$`avg(Total_DKP)`, WR_TE_vs_bottom_ten$`avg(Total_DKP)`)
row_names4 = c("QB Fantasy Points", "RB Fantasy Points", "WR/TE Fantasy Points")
positional_top_table = as.data.frame(vs_top_teams,row_names4)
positional_worst_table = as.data.frame(vs_worst_teams, row_names4)
knitr::kable(list(positional_top_table,positional_worst_table))
vs_top_teams
QB Fantasy Points 13.930000
RB Fantasy Points 12.896296
WR/TE Fantasy Points 6.767038
vs_worst_teams
QB Fantasy Points 18.918909
RB Fantasy Points 13.575810
WR/TE Fantasy Points 8.377643

Part 1 conclusion:

Next I will organize the data by position starting with QB.

QB_df1 <- sqldf("select pos, player, team, pass_cmp, pass_att, pass_yds, pass_td, pass_int, pass_sacked, pass_sacked_yds, pass_long, pass_rating, comb_pass_rush_play, rush_scrambles, comb_pass_play, comb_rush_play, pass_poor_throws, pass_blitzed, pass_hurried, Total_DKP 
             from df
             where pos = 'QB' and pass_att > 15")
QB_totals = sqldf("select pos, player, team, avg(pass_cmp), avg(pass_att), avg(pass_yds), avg(pass_td), avg(pass_int), avg(pass_sacked), avg(pass_long), avg(pass_rating), avg(rush_scrambles), avg(comb_pass_play), avg(comb_rush_play), avg(pass_poor_throws), avg(pass_blitzed), avg(pass_hurried), avg(Total_DKP) 
from QB_df1
group by player")

Now we must rename the columns so we can use them in SQL.

QB_totals = rename(QB_totals, 'AVG_Cmp' = 'avg(pass_cmp)')
QB_totals = rename(QB_totals, 'AVG_Pass_Att' = 'avg(pass_att)')
QB_totals = rename(QB_totals, 'AVG_Pass_Yards' = 'avg(pass_yds)')
QB_totals = rename(QB_totals, 'AVG_Pass_TD' = 'avg(pass_td)')
QB_totals = rename(QB_totals, 'AVG_Pass_Sacked' = 'avg(pass_sacked)')
QB_totals = rename(QB_totals, 'AVG_Pass_Long' = 'avg(pass_long)')
QB_totals = rename(QB_totals, 'AVG_Pass_Rating' = 'avg(pass_rating)')
QB_totals = rename(QB_totals, 'AVG_INT' = 'avg(pass_int)')
QB_totals = rename(QB_totals, 'AVG_Pass_Play' = 'avg(comb_pass_play)')
QB_totals = rename(QB_totals, 'AVG_Rush_Play' = 'avg(comb_rush_play)')
QB_totals = rename(QB_totals, 'AVG_Poor_Passes' = 'avg(pass_poor_throws)')
QB_totals = rename(QB_totals, 'AVG_Blitzed' = 'avg(pass_blitzed)')
QB_totals = rename(QB_totals, 'AVG_Hurried' = 'avg(pass_hurried)')
QB_totals = rename(QB_totals, 'AVG_Total_DKP' = 'avg(Total_DKP)')
pass_cmp <- ggplot(QB_totals, aes(x = AVG_Cmp, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Pass Completions and Average Fantasy Points", x = "Average Pass Completions", y = "Average Fantasy Points")+
  geom_point()
pass_cmp
## `geom_smooth()` using formula 'y ~ x'

pass_att <- ggplot(QB_totals, aes(x = AVG_Pass_Att, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Pass Attempts and Fantasy Points", x = "Average Pass Attempts", y = "Average Fantasy Points")+
  geom_point()
pass_att
## `geom_smooth()` using formula 'y ~ x'

pass_yds <- ggplot(QB_totals, aes(x = AVG_Pass_Yards, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Pass Yards and Fantasy Points", x = "Average Pass Yards", y = "Average Fantasy Points")+
  geom_point()
pass_yds
## `geom_smooth()` using formula 'y ~ x'

pass_td <- ggplot(QB_totals, aes(x = AVG_Pass_TD, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Pass Attempts and Fantasy Points", x = "Average Pass Attempts", y = "Average Fantasy Points")+
  geom_point()
pass_td
## `geom_smooth()` using formula 'y ~ x'

pass_sacked <- ggplot(QB_totals, aes(x = AVG_Pass_Sacked, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Times Sacked and Fantasy Points", x = "Average Times Sacked", y = "Average Fantasy Points")+
  geom_point()
pass_sacked
## `geom_smooth()` using formula 'y ~ x'

pass_long <- ggplot(QB_totals, aes(x = AVG_Pass_Long, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Longest Pass and Fantasy Points", x = "Average Longest Pass", y = "Average Fantasy Points")+
  geom_point()
pass_long
## `geom_smooth()` using formula 'y ~ x'

pass_rating <- ggplot(QB_totals, aes(x = AVG_Pass_Rating, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Passer Rating and Fantasy Points", x = "Average Passer Rating", y = "Average Fantasy Points")+
  geom_point()
pass_rating
## `geom_smooth()` using formula 'y ~ x'

comb_pass_play <- ggplot(QB_totals, aes(x = AVG_Pass_Play, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Number of Pass Plays and Fantasy Points", x = "Average Number of Pass Plays", y = "Average Fantasy Points")+
  geom_point()
comb_pass_play
## `geom_smooth()` using formula 'y ~ x'

comb_rush_play <- ggplot(QB_totals, aes(x = AVG_Rush_Play, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Number of Run Plays and Fantasy Points", x = "Average Number of Run Plays", y = "Average Fantasy Points")+
  geom_point()
comb_rush_play
## `geom_smooth()` using formula 'y ~ x'

poor_passes <- ggplot(QB_totals, aes(x = AVG_Poor_Passes, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Poor Passes and Fantasy Points", x = "Average Poor Passes", y = "Average Fantasy Points")+
  geom_point()
poor_passes
## `geom_smooth()` using formula 'y ~ x'

pass_blitzed <- ggplot(QB_totals, aes(x = AVG_Blitzed, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Passes Blitzed and Fantasy Points", x = "Average Passes Blitzed", y = "Average Fantasy Points")+
  geom_point()
pass_blitzed
## `geom_smooth()` using formula 'y ~ x'

pass_hurried <- ggplot(QB_totals, aes(x = AVG_Hurried, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Passes Hurried and Fantasy Points", x = "Average Passes Hurried", y = "Average Fantasy Points")+
  geom_point()
pass_hurried
## `geom_smooth()` using formula 'y ~ x'

cor_avg_cmp = cor.test(QB_totals$AVG_Cmp, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_att = cor.test(QB_totals$AVG_Pass_Att, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_yds = cor.test(QB_totals$AVG_Pass_Yards, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_td = cor.test(QB_totals$AVG_Pass_TD, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_sacked = cor.test(QB_totals$AVG_Pass_Sacked, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_long = cor.test(QB_totals$AVG_Pass_Long, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_rating = cor.test(QB_totals$AVG_Pass_Rating, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_int = cor.test(QB_totals$AVG_INT, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_play = cor.test(QB_totals$AVG_Pass_Play, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rush_play = cor.test(QB_totals$AVG_Rush_Play, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_poor_pass = cor.test(QB_totals$AVG_Poor_Passes, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_blitzed = cor.test(QB_totals$AVG_Blitzed, QB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_hurried = cor.test(QB_totals$AVG_Hurried, QB_totals$AVG_Total_DKP, method = "pearson")

QB_table = c(cor_avg_pass_att$estimate, cor_avg_cmp$estimate, cor_avg_pass_yds$estimate, cor_avg_pass_td$estimate, cor_avg_pass_sacked$estimate, cor_avg_pass_long$estimate, cor_avg_pass_rating$estimate, cor_avg_pass_int$estimate, cor_avg_pass_play$estimate, cor_avg_rush_play$estimate, cor_avg_poor_pass$estimate, cor_avg_blitzed$estimate, cor_avg_pass_hurried$estimate)
row_names2 = c("Average Attempts", "Average Completions", "Average Passing Yards", "Average Passing Touchdowns", "Average Times Sacked", "Average Longest Pass", "Average Passer Rating", "Average Interceptions", "Average Passing Plays", "Average Rushing Plays", "Average Poor Passes", "Average Times Blitzed", "Average Passes Hurried")
QB_table = as.data.frame(QB_table, row_names2)
QB_table
##                                QB_table
## Average Attempts            0.577609660
## Average Completions         0.551552568
## Average Passing Yards       0.825130809
## Average Passing Touchdowns  0.887081580
## Average Times Sacked        0.103122367
## Average Longest Pass        0.487832083
## Average Passer Rating       0.713660641
## Average Interceptions      -0.149726553
## Average Passing Plays       0.626841441
## Average Rushing Plays       0.302522184
## Average Poor Passes         0.295717554
## Average Times Blitzed      -0.009947472
## Average Passes Hurried      0.053889982
#Passing touchdowns seems to have a higher correlation than passing yards to fantasy points. 
model_pass_yds = lm(AVG_Total_DKP~AVG_Pass_Yards, QB_totals)
summary(model_pass_yds)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Pass_Yards, data = QB_totals)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5063 -2.3218 -0.1601  1.1393  8.2428 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -3.290576   1.968680  -1.671    0.101    
## AVG_Pass_Yards  0.086959   0.008337  10.430 3.01e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.283 on 51 degrees of freedom
## Multiple R-squared:  0.6808, Adjusted R-squared:  0.6746 
## F-statistic: 108.8 on 1 and 51 DF,  p-value: 3.006e-14
# y = 0.0869x + -3.291 R-Squared = 0.6746 (Above Average)
model_pass_td = lm(AVG_Total_DKP~AVG_Pass_TD, QB_totals)
summary(model_pass_td)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Pass_TD, data = QB_totals)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.886 -1.685 -0.411  1.740  7.455 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.9655     0.7992   8.716 1.13e-11 ***
## AVG_Pass_TD   7.2347     0.5272  13.724  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.683 on 51 degrees of freedom
## Multiple R-squared:  0.7869, Adjusted R-squared:  0.7827 
## F-statistic: 188.3 on 1 and 51 DF,  p-value: < 2.2e-16
# y = 7.2347x + 6.9655 R-Squared = 0.7827 (Good)

#As far as stats not calculated into fantasy points, passer rating is the best predictor. 
model_pass_rating = lm(AVG_Total_DKP~AVG_Pass_Rating, QB_totals)
summary(model_pass_rating)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Pass_Rating, data = QB_totals)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2360  -2.5392   0.2442   1.4613   9.4898 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.67816    3.26116  -2.048   0.0457 *  
## AVG_Pass_Rating  0.26759    0.03678   7.276    2e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.071 on 51 degrees of freedom
## Multiple R-squared:  0.5093, Adjusted R-squared:  0.4997 
## F-statistic: 52.94 on 1 and 51 DF,  p-value: 1.998e-09
# y = 0.2676x -6.6782 R-Squared = 0.4997 (Average)
RB_df1 = sqldf("select pos, player, team, rush_att, rush_yds, rush_td, rush_long, targets, rec, rec_yds, rec_td, rec_long, fumbles_lost, designed_rush_att, rush_yds_before_contact, rush_yac, rush_broken_tackles, Total_DKP
          from df 
          where pos = 'RB'")
RB_totals = sqldf("select pos, player, team, avg(rush_att), avg(rush_yds), avg(rush_td), avg(rush_long), avg(targets), avg(rec), avg(rec_yds), avg(rec_td), avg(rec_long), avg(fumbles_lost), avg(designed_rush_att), avg(rush_yds_before_contact), avg(rush_yac), avg(rush_broken_tackles), avg(Total_DKP)
                  from RB_df1
                  where rush_att > 5
                  group by player")
RB_totals = rename(RB_totals, 'AVG_Total_DKP' = 'avg(Total_DKP)')
RB_totals = rename(RB_totals, 'AVG_Rush_Att' = 'avg(rush_att)')
RB_totals = rename(RB_totals, 'AVG_Rush_Yds' = 'avg(rush_yds)')
RB_totals = rename(RB_totals, 'AVG_Rush_TD' = 'avg(rush_td)')
RB_totals = rename(RB_totals, 'AVG_Rush_Long' = 'avg(rush_long)')
RB_totals = rename(RB_totals, 'AVG_Targets' = 'avg(targets)')
RB_totals = rename(RB_totals, 'AVG_Rec' = 'avg(rec)')
RB_totals = rename(RB_totals, 'AVG_Receiving_Yards' = 'avg(rec_yds)')
RB_totals = rename(RB_totals, 'AVG_Yds_Before_Contact' = 'avg(rush_yds_before_contact)')
RB_totals = rename(RB_totals, 'AVG_Yds_After_Contact' = 'avg(rush_yac)')
RB_totals = rename(RB_totals, 'AVG_Receiving_TD' = 'avg(rec_td)')
RB_totals = rename(RB_totals, 'AVG_Broken_Tackles' = 'avg(rush_broken_tackles)')
RB_rush_att <- ggplot(RB_totals, aes(x = AVG_Rush_Att, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Rushing Attempts and Fantasy Points", x = "Average Rushing Attempts", y = "Average Fantasy Points")+
  geom_point()
RB_rush_att
## `geom_smooth()` using formula 'y ~ x'

RB_rush_yds <- ggplot(RB_totals, aes(x = AVG_Rush_Yds, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Rushing Yards and Fantasy Points", x = "Average Rushing Yards", y = "Average Fantasy Points")+
  geom_point()
RB_rush_yds
## `geom_smooth()` using formula 'y ~ x'

RB_rush_td <- ggplot(RB_totals, aes(x = AVG_Rush_TD, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Rushing Touchdowns and Fantasy Points", x = "Average Rushing Touchdowns", y = "Average Fantasy Points")+
  geom_point()
RB_rush_td
## `geom_smooth()` using formula 'y ~ x'

RB_rush_long <- ggplot(RB_totals, aes(x = AVG_Rush_Long, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Longest Run and Fantasy Points", x = "Average Longest Run", y = "Average Fantasy Points")+
  geom_point()
RB_rush_long
## `geom_smooth()` using formula 'y ~ x'

RB_targets <- ggplot(RB_totals, aes(x = AVG_Targets, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Targets and Fantasy Points", x = "Average Targets", y = "Average Fantasy Points")+
  geom_point()
RB_targets
## `geom_smooth()` using formula 'y ~ x'

RB_rec <- ggplot(RB_totals, aes(x = AVG_Rec, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Receptions and Fantasy Points", x = "Average Receptions", y = "Average Fantasy Points")+
  geom_point()
RB_rec
## `geom_smooth()` using formula 'y ~ x'

RB_recieving_yds <- ggplot(RB_totals, aes(x = AVG_Receiving_Yards, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Recieving Yards and Fantasy Points", x = "Average Recieving Yards", y = "Average Fantasy Points")+
  geom_point()
RB_recieving_yds
## `geom_smooth()` using formula 'y ~ x'

RB_recieving_td <- ggplot(RB_totals, aes(x = AVG_Receiving_TD, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Recieving Touchdowns and Fantasy Points", x = "Average Recieving Touchdowns", y = "Average Fantasy Points")+
  geom_point()
RB_recieving_td
## `geom_smooth()` using formula 'y ~ x'

RB_yds_before_contact <- ggplot(RB_totals, aes(x = AVG_Yds_Before_Contact, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Yards Before Contact and Fantasy Points", x = "Average Yards Before Contact", y = "Average Fantasy Points")+
  geom_point()
RB_yds_before_contact
## `geom_smooth()` using formula 'y ~ x'

RB_yac <- ggplot(RB_totals, aes(x = AVG_Yds_After_Contact, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Yards After Contact and Fantasy Points", x = "Average Yards After Contact", y = "Average Fantasy Points")+
  geom_point()
RB_yac
## `geom_smooth()` using formula 'y ~ x'

RB_broken_tackles <- ggplot(RB_totals, aes(x = AVG_Broken_Tackles, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Broken Tackles and Fantasy Points", x = "Average Broken Tackles", y = "Average Fantasy Points")+
  geom_point()
RB_broken_tackles
## `geom_smooth()` using formula 'y ~ x'

cor_avg_rush_att = cor.test(RB_totals$AVG_Rush_Att, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rush_yds = cor.test(RB_totals$AVG_Rush_Yds, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rush_td = cor.test(RB_totals$AVG_Rush_TD, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rush_long = cor.test(RB_totals$AVG_Rush_Long, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rb_targets = cor.test(RB_totals$AVG_Targets, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rb_rec = cor.test(RB_totals$AVG_Rec, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rb_rec_yds = cor.test(RB_totals$AVG_Receiving_Yards, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rb_rec_td = cor.test(RB_totals$AVG_Receiving_TD, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rb_yds_before_contact = cor.test(RB_totals$AVG_Yds_Before_Contact, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rb_yds_after_contact = cor.test(RB_totals$AVG_Yds_After_Contact, RB_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rb_broken_tackles = cor.test(RB_totals$AVG_Broken_Tackles, RB_totals$AVG_Total_DKP, method = "pearson")

RB_table = c(cor_avg_rush_att$estimate, cor_avg_rush_yds$estimate, cor_avg_rush_td$estimate, cor_avg_rush_long$estimate, cor_avg_rb_targets$estimate, cor_avg_rb_rec$estimate, cor_avg_rb_rec_yds$estimate, cor_avg_rb_rec_td$estimate, cor_avg_rb_yds_before_contact$estimate, cor_avg_rb_yds_after_contact$estimate, cor_avg_rb_broken_tackles$estimate)
row_names1 = c("Average Attempts", "Average Rushing Yards", "Average Rushing Touchdowns", "Average Longest Run", "Average Passing Targets", "Average Receptions", "Average Recieving Yards", "Average Recieving Touchdowns", "Average Yards Before Contact", "Average Yards After Contact", "Average Broken Tackles")
RB_table = as.data.frame(RB_table, row_names1)
RB_table
##                               RB_table
## Average Attempts             0.6839729
## Average Rushing Yards        0.7976372
## Average Rushing Touchdowns   0.6288592
## Average Longest Run          0.6273653
## Average Passing Targets      0.7219403
## Average Receptions           0.7517249
## Average Recieving Yards      0.7629032
## Average Recieving Touchdowns 0.4680772
## Average Yards Before Contact 0.7184490
## Average Yards After Contact  0.7387496
## Average Broken Tackles       0.5616254
# Between the fantasy values, rushing yards, receiving yards, and receptions have the biggest correlation to fantasy points. 
model_rush_yds = lm(AVG_Total_DKP~AVG_Rush_Yds, RB_totals)
summary(model_rush_yds)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Rush_Yds, data = RB_totals)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.283 -2.279 -0.928  2.475 10.595 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.86256    0.90709   0.951    0.344    
## AVG_Rush_Yds  0.21783    0.01708  12.754   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.439 on 93 degrees of freedom
## Multiple R-squared:  0.6362, Adjusted R-squared:  0.6323 
## F-statistic: 162.7 on 1 and 93 DF,  p-value: < 2.2e-16
# y = 0.2178x + 0.8626 R-Squared = 0.6323 (Slightly Above Average)
model_receiving_yards_rb = lm(AVG_Total_DKP~AVG_Receiving_Yards, RB_totals)
summary(model_receiving_yards_rb)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Receiving_Yards, data = RB_totals)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3084 -2.6325 -0.3575  2.1421 13.1407 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           6.0747     0.6099    9.96 2.49e-16 ***
## AVG_Receiving_Yards   0.3414     0.0300   11.38  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.686 on 93 degrees of freedom
## Multiple R-squared:  0.582,  Adjusted R-squared:  0.5775 
## F-statistic: 129.5 on 1 and 93 DF,  p-value: < 2.2e-16
# y = 0.3414 + 6.0747 R-Squared = 0.5775 (Average)
model_rec_rb = lm(AVG_Total_DKP~AVG_Rec, RB_totals)
summary(model_rec_rb)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Rec, data = RB_totals)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.727 -2.642 -0.274  2.210 13.778 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.3564     0.6806    7.87 6.33e-12 ***
## AVG_Rec       2.9568     0.2690   10.99  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.76 on 93 degrees of freedom
## Multiple R-squared:  0.5651, Adjusted R-squared:  0.5604 
## F-statistic: 120.8 on 1 and 93 DF,  p-value: < 2.2e-16
# y = 2.9568x + 5.3564 R-Squared = 0.5604 (Average)

#Out of non-fantasy stats, yards after contact, before contact, and attempts 
model_rush_before = lm(AVG_Total_DKP~AVG_Yds_Before_Contact, RB_totals)
summary(model_rush_before)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Yds_Before_Contact, data = RB_totals)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1274 -2.8343 -0.5193  2.6859 10.8394 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             2.18656    1.02158   2.140   0.0349 *  
## AVG_Yds_Before_Contact  0.36065    0.03621   9.961 2.48e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.966 on 93 degrees of freedom
## Multiple R-squared:  0.5162, Adjusted R-squared:  0.511 
## F-statistic: 99.22 on 1 and 93 DF,  p-value: 2.479e-16
# y = 0.3607x + 2.1866 R-Squared = 0.511 (Average)
model_rush_after = lm(AVG_Total_DKP~AVG_Yds_After_Contact, RB_totals)
summary(model_rush_after)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Yds_After_Contact, data = RB_totals)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6379 -2.5878 -0.3584  2.7851  9.9071 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3.07624    0.89083   3.453 0.000836 ***
## AVG_Yds_After_Contact  0.36640    0.03466  10.570  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.843 on 93 degrees of freedom
## Multiple R-squared:  0.5458, Adjusted R-squared:  0.5409 
## F-statistic: 111.7 on 1 and 93 DF,  p-value: < 2.2e-16
# y = 0.3664x + 3.0762 R-Squared = 0.5409 (Average)
model_rush_att = lm(AVG_Total_DKP~AVG_Rush_Att, RB_totals)
summary(model_rush_att)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Rush_Att, data = RB_totals)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4399 -2.5408 -0.5751  2.5571 11.2939 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.6459     1.4116  -0.458    0.648    
## AVG_Rush_Att   1.0266     0.1135   9.042 2.18e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.159 on 93 degrees of freedom
## Multiple R-squared:  0.4678, Adjusted R-squared:  0.4621 
## F-statistic: 81.75 on 1 and 93 DF,  p-value: 2.179e-14
# y = 1.0266x - 0.6459 R-Squared = 0.4621 (Average)
WR_TE_df1 = sqldf("select pos, player, team, targets, rec, rec_yds, rec_td, rec_long, fumbles_lost, designed_rush_att, comb_pass_rush_play, comb_pass_play, Total_DKP
               from df
               where pos = 'WR' or pos = 'TE'")
WR_TE_totals = sqldf("select pos, player, team, avg(targets), avg(rec), avg(rec_yds), avg(rec_td), avg(rec_long), avg(fumbles_lost), avg(designed_rush_att), avg(comb_pass_rush_play), avg(comb_pass_play), avg(Total_DKP)
                     from WR_TE_df1
                     where targets > 2
                     group by player")
WR_TE_totals = rename(WR_TE_totals, 'AVG_Total_DKP' = 'avg(Total_DKP)')
WR_TE_totals = rename(WR_TE_totals, 'AVG_Targets' = 'avg(targets)')
WR_TE_totals = rename(WR_TE_totals, 'AVG_Rec' = 'avg(rec)')
WR_TE_totals = rename(WR_TE_totals, 'AVG_Rec_Yds' = 'avg(rec_yds)')
WR_TE_totals = rename(WR_TE_totals, 'AVG_Rec_TD' = 'avg(rec_td)')
WR_TE_totals = rename(WR_TE_totals, 'AVG_Rec_Long' = 'avg(rec_long)')
WR_TE_totals = rename(WR_TE_totals, 'AVG_Rec_Passing_Rushing_Plays' = 'avg(comb_pass_rush_play)')
WR_TE_targets <- ggplot(WR_TE_totals, aes(x = AVG_Targets, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Targets and Fantasy Points", x = "Average Targets", y = "Average Fantasy Points")+
  geom_point()
WR_TE_targets
## `geom_smooth()` using formula 'y ~ x'

WR_TE_rec <- ggplot(WR_TE_totals, aes(x = AVG_Rec, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Receptions and Fantasy Points", x = "Average Rec", y = "Average Fantasy Points")+
  geom_point()
WR_TE_rec
## `geom_smooth()` using formula 'y ~ x'

WR_TE_rec_yds <- ggplot(WR_TE_totals, aes(x = AVG_Rec_Yds, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Recieving Yards and Fantasy Points", x = "Average Recieving Yards", y = "Average Fantasy Points")+
  geom_point()
WR_TE_rec_yds
## `geom_smooth()` using formula 'y ~ x'

WR_TE_rec_td <- ggplot(WR_TE_totals, aes(x = AVG_Rec_TD, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Recieving Touchdowns and Fantasy Points", x = "Average Recieving Touchdowns", y = "Average Fantasy Points")+
  geom_point()
WR_TE_rec_td
## `geom_smooth()` using formula 'y ~ x'

WR_TE_rec_long <- ggplot(WR_TE_totals, aes(x = AVG_Rec_Long, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Longest Catch and Fantasy Points", x = "Average Longest Catch", y = "Average Fantasy Points")+
  geom_point()
WR_TE_rec_long
## `geom_smooth()` using formula 'y ~ x'

WR_TE_passing_rushing_plays <- ggplot(WR_TE_totals, aes(x = AVG_Rec_Passing_Rushing_Plays, y = AVG_Total_DKP))+
  geom_smooth(method = "lm", se = TRUE)+
  labs(title = "Relationship Between Average Passing and Running Plays and Fantasy Points", x = "Average Passing and Running Plays", y = "Average Fantasy Points")+
  geom_point()
WR_TE_passing_rushing_plays
## `geom_smooth()` using formula 'y ~ x'

cor_avg_targets = cor.test(WR_TE_totals$AVG_Targets, WR_TE_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rec = cor.test(WR_TE_totals$AVG_Rec, WR_TE_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rec_yds = cor.test(WR_TE_totals$AVG_Rec_Yds, WR_TE_totals$AVG_Total_DKP, method = "pearson")

cor_avg_td = cor.test(WR_TE_totals$AVG_Rec_TD, WR_TE_totals$AVG_Total_DKP, method = "pearson")

cor_avg_rec_long = cor.test(WR_TE_totals$AVG_Rec_Long, WR_TE_totals$AVG_Total_DKP, method = "pearson")

cor_avg_pass_rush_plays = cor.test(WR_TE_totals$AVG_Rec_Passing_Rushing_Plays, WR_TE_totals$AVG_Total_DKP, method = "pearson")

WR_TE_table = c(cor_avg_targets$estimate, cor_avg_rec$estimate, cor_avg_rec_yds$estimate, cor_avg_td$estimate, cor_avg_rec_long$estimate, cor_avg_pass_rush_plays$estimate)
row_name = c("Average Targets", "Average Receptions", "Average Recieving Yards", "Averager Recieving Touchdowns", "Average Longest Catch", "Average Run and Pass Plays")
WR_TE_table = as.data.frame(WR_TE_table, row_name)
WR_TE_table
##                               WR_TE_table
## Average Targets                 0.8073433
## Average Receptions              0.8496987
## Average Recieving Yards         0.9065176
## Averager Recieving Touchdowns   0.6500585
## Average Longest Catch           0.7132301
## Average Run and Pass Plays      0.1365477
# The fantasy statistic with the highest correlation is receiving yards. 
model_for_rec_yds_wr_te = lm(AVG_Total_DKP~AVG_Rec_Yds, WR_TE_totals)
summary(model_for_rec_yds_wr_te)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Rec_Yds, data = WR_TE_totals)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6574 -1.2725 -0.2365  0.8408 12.4250 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.286414   0.263472   4.883 1.86e-06 ***
## AVG_Rec_Yds 0.199654   0.005857  34.087  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.906 on 252 degrees of freedom
## Multiple R-squared:  0.8218, Adjusted R-squared:  0.8211 
## F-statistic:  1162 on 1 and 252 DF,  p-value: < 2.2e-16
# y = 0.1997x + 1.2864 R-Squared = 0.8211 (Good)

# Some non-calculated stat to consider is targets
model_for_target_wr_te = lm(AVG_Total_DKP~AVG_Targets, WR_TE_totals)
summary(model_for_target_wr_te)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Targets, data = WR_TE_totals)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.0300 -1.7669 -0.5047  1.4713  7.8193 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.17380    0.50990  -2.302   0.0221 *  
## AVG_Targets  1.95064    0.08981  21.719   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.664 on 252 degrees of freedom
## Multiple R-squared:  0.6518, Adjusted R-squared:  0.6504 
## F-statistic: 471.7 on 1 and 252 DF,  p-value: < 2.2e-16
# y = 1.9506x -1.1738 R-Squared = 0.6518 (Above Average)
model_long_rec_wr_te = lm(AVG_Total_DKP~AVG_Rec_Long, WR_TE_totals)
summary(model_long_rec_wr_te)
## 
## Call:
## lm(formula = AVG_Total_DKP ~ AVG_Rec_Long, data = WR_TE_totals)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.821 -1.872 -0.399  1.818 14.251 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.71044    0.50945   3.357 0.000908 ***
## AVG_Rec_Long  0.39444    0.02442  16.153  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.165 on 252 degrees of freedom
## Multiple R-squared:  0.5087, Adjusted R-squared:  0.5067 
## F-statistic: 260.9 on 1 and 252 DF,  p-value: < 2.2e-16
# y = 0.3944x + 1.7104 R-Squared = 0.5067 (Average)

Part 2 conclusion:

When selecting a fantasy team:

At QB use:

At RB, the most reliable model is:

For WR and TE the best prediction model is:

Overall this project should help fantasy players look for which specific stats can help them select players who will put up good fantasy points in the future.